setwd("~/HOT_R_working_directory")
save.image(file = "HOT_anal-4Jul.RData")
load("~/HOT_R_working_directory/HOT_anal-4Jul.RData")
# This will force R to give the whole digits, not scientific notation
options("scipen"=100, "digits"=4)
library(edgeR)
## Loading required package: limma
library(reshape2)
library(ggplot2)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6
library(plyr)
library(UpSetR)
##
## Attaching package: 'UpSetR'
## The following object is masked from 'package:lattice':
##
## histogram
########## Supplimental figure 1 : CTD Data
Data = read.csv("CTD_ALL_averages.csv", header = T)
head(Data)
## Depth Temp_268 Temp_271 Temp_275 Temp_279 NO3_268 NO3_271 NO3_275 NO3_279
## 1 0 24.44 24.13 27.09 26.01 4.933 1.4044 2.514 4.276
## 2 2 24.64 24.22 27.10 26.01 4.627 1.2927 2.506 4.186
## 3 4 24.64 24.22 27.11 26.00 4.294 0.8582 2.618 3.941
## 4 6 24.64 24.22 27.11 26.01 4.107 0.7019 2.243 3.865
## 5 8 24.64 24.22 27.11 26.01 4.338 0.7934 2.270 3.694
## 6 10 24.64 24.22 27.10 26.01 4.049 0.8162 2.244 3.825
## O2_268 O2_271 O2_275 O2_279 Chl_268 Chl_271 Chl_275 Chl_279 Sal_268 Sal_271
## 1 208.2 210.0 201.0 202.5 0.08154 0.1210 0.0007 0.1571 35.33 35.14
## 2 208.2 209.9 201.0 202.5 0.19695 0.1477 0.0007 0.1571 35.33 35.14
## 3 208.2 210.2 201.0 202.8 0.24758 0.1063 0.0698 0.1667 35.33 35.14
## 4 208.3 210.2 200.9 202.7 0.25912 0.1134 0.0751 0.1778 35.33 35.13
## 5 208.0 210.0 201.2 202.7 0.25446 0.1131 0.0735 0.1762 35.33 35.13
## 6 208.1 210.1 201.3 202.7 0.26717 0.1032 0.0754 0.1650 35.33 35.13
## Sal_275 Sal_279
## 1 35.06 35
## 2 35.06 35
## 3 35.06 35
## 4 35.06 35
## 5 35.06 35
## 6 35.06 35
#qplot(Data$DBAR, Data$Temp_avg, geom = "line", )
Temps = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = Temp_268, colour = "Temp_268")) + geom_line(aes(y = Temp_271, colour = "Temp_271")) + geom_line(aes(y = Temp_275, colour = "Temp_275")) + geom_line(aes(y = Temp_279, colour = "Temp_279")) + labs(y = "Temperature (ËšC)", x = "Depth (m)")
Oxygen = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = O2_268, colour = "O2_268")) + geom_line(aes(y = O2_271, colour = "O2_271")) + geom_line(aes(y = O2_275, colour = "O2_275")) + geom_line(aes(y = O2_279, colour = "O2_279")) + labs(y = "Oxygen (uM/L)", x = "Depth (m)")
Chlphyl = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = Chl_268, colour = "Chl_268")) + geom_line(aes(y = Chl_271, colour = "Chl_271")) + geom_line(aes(y = Chl_275, colour = "Chl_275")) + geom_line(aes(y = Chl_279, colour = "Chl_279")) + labs(y = "Chl-a (ug/L)", x = "Depth (m)")
Salin = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = Sal_268, colour = "Sal_268")) + geom_line(aes(y = Sal_271, colour = "Sal_271")) + geom_line(aes(y = Sal_275, colour = "Sal_275")) + geom_line(aes(y = Sal_279, colour = "Sal_279")) + labs(y = "Salinity (pss)", x = "Depth (m)")
Nitrate = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = NO3_268, colour = "NO3_268")) + geom_line(aes(y = NO3_271, colour = "NO3_271")) + geom_line(aes(y = NO3_275, colour = "NO3_275")) + geom_line(aes(y = NO3_279, colour = "NO3_279")) + labs(y = "Nitrate (uM/L)", x = "Depth (m)")
#Import ASV table
ASV_table<-read.table("Raw_table_for_manu.txt",header=TRUE, comment.char = "")
Header = c("ASV.ID","5m.Dec2014","25m.Dec2014","45m.Dec2014","75m.Dec2014","100m.Dec2014","125m.Dec2014","150m.Dec2014","175m.Dec2014","300m.Dec2014","400m.Dec2014","500m.Dec2014","770m.Dec2014","5m.Apr","25m.Apr","75m.Apr","100m.Apr","125m.Apr","150m.Apr","175m.Apr","300m.Apr","400m.Apr","500m.Apr","770m.Apr","5m.Aug","25m.Aug","45m.Aug","75m.Aug","100m.Aug","125m.Aug","150m.Aug","175m.Aug","300m.Aug","400m.Aug","500m.Aug","770m.Aug","5m.Dec2015","25m.Dec2015","45m.Dec2015","75m.Dec2015","100m.Dec2015","125m.Dec2015","150m.Dec2015","175m.Dec2015","300m.Dec2015","400m.Dec2015","500m.Dec2015","770m.Dec2015","Taxonomy")
colnames(ASV_table) = Header #Swap the current header for the custom header for columnames
rownames(ASV_table) = ASV_table$ASV.ID #Here I want to give the rownames the ASV IDs
#dim(ASV_table) #Check the dimensions. There are 11538rows and 49 columns
#Calculate the total number of sequence reads
Global_Seq_total = sum(colSums(ASV_table[,2:48]))
#there are 2,598,281 total reads in the dataset
##Filter out OTUs with only 1 sequence in the whole dataset (global singletons)
ASV_total<-apply(ASV_table[2:48],1,sum) #here we are summing each row in the dataset, where each row represents an ASV and its abundances across the different samples (columns)
#apply(ASV_table[2:48], 2, sum)
ASV_table.no1 = ASV_table[ ASV_total>1, ] #Here we are retaining only ASVs that have more than one sequence
removed = dim(ASV_table)[1] - dim(ASV_table.no1)[1] #The number of singleton ASVs lost in the previous step
# 343 singleton ASVs were removed
########### Sequence Analysis and plots ############
Data_cols = ASV_table.no1[,2:48]
sequence_total<-apply(Data_cols,2,sum) #number of sequences per sample(column)
ASV_count<-colSums(Data_cols>0) #total number of OTUs
ASV_singleton<-colSums(Data_cols==1) #Number of singleton OTUs
ASV_doubleton<-colSums(Data_cols==2) #Number of doubleton OTUs
ASV_else<-colSums(Data_cols>2) #Number of OTUs with more than 2 sequences
#dataframe with OTU stats per sample
ASVsample_info<-data.frame(sequence_total,ASV_count,ASV_singleton,ASV_doubleton,ASV_else)# Compile
#Plot it
ASVsample_info$samples<-row.names(ASVsample_info)
ASVsample_info$samples = factor(ASVsample_info$samples, levels = ASVsample_info$samples) ##***** this trick is to keep the order on the X axis
Melted<-melt(ASVsample_info) #Melt to long format part of the reshape package
## Using samples as id variables
#head(Melted)
Melted$Figure<-"Sequences"
Melted$Figure[Melted$variable == "ASV_count"]<-"Total ASVs"
ASVdist<-c("ASV_singleton", "ASV_doubleton", "ASV_else")
Melted$Figure[Melted$variable %in% ASVdist]<-"Breakdown of ASVs"
Melted
## samples variable value Figure
## 1 5m.Dec2014 sequence_total 86504 Sequences
## 2 25m.Dec2014 sequence_total 64988 Sequences
## 3 45m.Dec2014 sequence_total 55678 Sequences
## 4 75m.Dec2014 sequence_total 87424 Sequences
## 5 100m.Dec2014 sequence_total 53931 Sequences
## 6 125m.Dec2014 sequence_total 50649 Sequences
## 7 150m.Dec2014 sequence_total 68535 Sequences
## 8 175m.Dec2014 sequence_total 54520 Sequences
## 9 300m.Dec2014 sequence_total 132084 Sequences
## 10 400m.Dec2014 sequence_total 80535 Sequences
## 11 500m.Dec2014 sequence_total 78374 Sequences
## 12 770m.Dec2014 sequence_total 101613 Sequences
## 13 5m.Apr sequence_total 57199 Sequences
## 14 25m.Apr sequence_total 62103 Sequences
## 15 75m.Apr sequence_total 48195 Sequences
## 16 100m.Apr sequence_total 50921 Sequences
## 17 125m.Apr sequence_total 55271 Sequences
## 18 150m.Apr sequence_total 53244 Sequences
## 19 175m.Apr sequence_total 43776 Sequences
## 20 300m.Apr sequence_total 44903 Sequences
## 21 400m.Apr sequence_total 85980 Sequences
## 22 500m.Apr sequence_total 74359 Sequences
## 23 770m.Apr sequence_total 79572 Sequences
## 24 5m.Aug sequence_total 97523 Sequences
## 25 25m.Aug sequence_total 83069 Sequences
## 26 45m.Aug sequence_total 76894 Sequences
## 27 75m.Aug sequence_total 104836 Sequences
## 28 100m.Aug sequence_total 50420 Sequences
## 29 125m.Aug sequence_total 24246 Sequences
## 30 150m.Aug sequence_total 30023 Sequences
## 31 175m.Aug sequence_total 32852 Sequences
## 32 300m.Aug sequence_total 26720 Sequences
## 33 400m.Aug sequence_total 33464 Sequences
## 34 500m.Aug sequence_total 29631 Sequences
## 35 770m.Aug sequence_total 32917 Sequences
## 36 5m.Dec2015 sequence_total 44278 Sequences
## 37 25m.Dec2015 sequence_total 60138 Sequences
## 38 45m.Dec2015 sequence_total 33859 Sequences
## 39 75m.Dec2015 sequence_total 32924 Sequences
## 40 100m.Dec2015 sequence_total 31289 Sequences
## 41 125m.Dec2015 sequence_total 23897 Sequences
## 42 150m.Dec2015 sequence_total 27516 Sequences
## 43 175m.Dec2015 sequence_total 29800 Sequences
## 44 300m.Dec2015 sequence_total 30907 Sequences
## 45 400m.Dec2015 sequence_total 23215 Sequences
## 46 500m.Dec2015 sequence_total 24524 Sequences
## 47 770m.Dec2015 sequence_total 42638 Sequences
## 48 5m.Dec2014 ASV_count 768 Total ASVs
## 49 25m.Dec2014 ASV_count 682 Total ASVs
## 50 45m.Dec2014 ASV_count 608 Total ASVs
## 51 75m.Dec2014 ASV_count 780 Total ASVs
## 52 100m.Dec2014 ASV_count 746 Total ASVs
## 53 125m.Dec2014 ASV_count 658 Total ASVs
## 54 150m.Dec2014 ASV_count 817 Total ASVs
## 55 175m.Dec2014 ASV_count 552 Total ASVs
## 56 300m.Dec2014 ASV_count 1173 Total ASVs
## 57 400m.Dec2014 ASV_count 935 Total ASVs
## 58 500m.Dec2014 ASV_count 817 Total ASVs
## 59 770m.Dec2014 ASV_count 878 Total ASVs
## 60 5m.Apr ASV_count 648 Total ASVs
## 61 25m.Apr ASV_count 665 Total ASVs
## 62 75m.Apr ASV_count 747 Total ASVs
## 63 100m.Apr ASV_count 848 Total ASVs
## 64 125m.Apr ASV_count 727 Total ASVs
## 65 150m.Apr ASV_count 768 Total ASVs
## 66 175m.Apr ASV_count 686 Total ASVs
## 67 300m.Apr ASV_count 744 Total ASVs
## 68 400m.Apr ASV_count 977 Total ASVs
## 69 500m.Apr ASV_count 1045 Total ASVs
## 70 770m.Apr ASV_count 817 Total ASVs
## 71 5m.Aug ASV_count 787 Total ASVs
## 72 25m.Aug ASV_count 907 Total ASVs
## 73 45m.Aug ASV_count 904 Total ASVs
## 74 75m.Aug ASV_count 818 Total ASVs
## 75 100m.Aug ASV_count 713 Total ASVs
## 76 125m.Aug ASV_count 523 Total ASVs
## 77 150m.Aug ASV_count 636 Total ASVs
## 78 175m.Aug ASV_count 734 Total ASVs
## 79 300m.Aug ASV_count 584 Total ASVs
## 80 400m.Aug ASV_count 697 Total ASVs
## 81 500m.Aug ASV_count 668 Total ASVs
## 82 770m.Aug ASV_count 618 Total ASVs
## 83 5m.Dec2015 ASV_count 639 Total ASVs
## 84 25m.Dec2015 ASV_count 800 Total ASVs
## 85 45m.Dec2015 ASV_count 647 Total ASVs
## 86 75m.Dec2015 ASV_count 702 Total ASVs
## 87 100m.Dec2015 ASV_count 542 Total ASVs
## 88 125m.Dec2015 ASV_count 469 Total ASVs
## 89 150m.Dec2015 ASV_count 560 Total ASVs
## 90 175m.Dec2015 ASV_count 593 Total ASVs
## 91 300m.Dec2015 ASV_count 658 Total ASVs
## 92 400m.Dec2015 ASV_count 554 Total ASVs
## 93 500m.Dec2015 ASV_count 480 Total ASVs
## 94 770m.Dec2015 ASV_count 665 Total ASVs
## 95 5m.Dec2014 ASV_singleton 2 Breakdown of ASVs
## 96 25m.Dec2014 ASV_singleton 0 Breakdown of ASVs
## 97 45m.Dec2014 ASV_singleton 0 Breakdown of ASVs
## 98 75m.Dec2014 ASV_singleton 0 Breakdown of ASVs
## 99 100m.Dec2014 ASV_singleton 2 Breakdown of ASVs
## 100 125m.Dec2014 ASV_singleton 3 Breakdown of ASVs
## 101 150m.Dec2014 ASV_singleton 6 Breakdown of ASVs
## 102 175m.Dec2014 ASV_singleton 1 Breakdown of ASVs
## 103 300m.Dec2014 ASV_singleton 3 Breakdown of ASVs
## 104 400m.Dec2014 ASV_singleton 1 Breakdown of ASVs
## 105 500m.Dec2014 ASV_singleton 2 Breakdown of ASVs
## 106 770m.Dec2014 ASV_singleton 2 Breakdown of ASVs
## 107 5m.Apr ASV_singleton 0 Breakdown of ASVs
## 108 25m.Apr ASV_singleton 0 Breakdown of ASVs
## 109 75m.Apr ASV_singleton 2 Breakdown of ASVs
## 110 100m.Apr ASV_singleton 1 Breakdown of ASVs
## 111 125m.Apr ASV_singleton 1 Breakdown of ASVs
## 112 150m.Apr ASV_singleton 4 Breakdown of ASVs
## 113 175m.Apr ASV_singleton 2 Breakdown of ASVs
## 114 300m.Apr ASV_singleton 0 Breakdown of ASVs
## 115 400m.Apr ASV_singleton 2 Breakdown of ASVs
## 116 500m.Apr ASV_singleton 2 Breakdown of ASVs
## 117 770m.Apr ASV_singleton 0 Breakdown of ASVs
## 118 5m.Aug ASV_singleton 0 Breakdown of ASVs
## 119 25m.Aug ASV_singleton 1 Breakdown of ASVs
## 120 45m.Aug ASV_singleton 0 Breakdown of ASVs
## 121 75m.Aug ASV_singleton 1 Breakdown of ASVs
## 122 100m.Aug ASV_singleton 1 Breakdown of ASVs
## 123 125m.Aug ASV_singleton 1 Breakdown of ASVs
## 124 150m.Aug ASV_singleton 0 Breakdown of ASVs
## 125 175m.Aug ASV_singleton 2 Breakdown of ASVs
## 126 300m.Aug ASV_singleton 0 Breakdown of ASVs
## 127 400m.Aug ASV_singleton 1 Breakdown of ASVs
## 128 500m.Aug ASV_singleton 1 Breakdown of ASVs
## 129 770m.Aug ASV_singleton 0 Breakdown of ASVs
## 130 5m.Dec2015 ASV_singleton 0 Breakdown of ASVs
## 131 25m.Dec2015 ASV_singleton 0 Breakdown of ASVs
## 132 45m.Dec2015 ASV_singleton 1 Breakdown of ASVs
## 133 75m.Dec2015 ASV_singleton 1 Breakdown of ASVs
## 134 100m.Dec2015 ASV_singleton 2 Breakdown of ASVs
## 135 125m.Dec2015 ASV_singleton 4 Breakdown of ASVs
## 136 150m.Dec2015 ASV_singleton 3 Breakdown of ASVs
## 137 175m.Dec2015 ASV_singleton 2 Breakdown of ASVs
## 138 300m.Dec2015 ASV_singleton 2 Breakdown of ASVs
## 139 400m.Dec2015 ASV_singleton 1 Breakdown of ASVs
## 140 500m.Dec2015 ASV_singleton 0 Breakdown of ASVs
## 141 770m.Dec2015 ASV_singleton 1 Breakdown of ASVs
## 142 5m.Dec2014 ASV_doubleton 12 Breakdown of ASVs
## 143 25m.Dec2014 ASV_doubleton 13 Breakdown of ASVs
## 144 45m.Dec2014 ASV_doubleton 6 Breakdown of ASVs
## 145 75m.Dec2014 ASV_doubleton 10 Breakdown of ASVs
## 146 100m.Dec2014 ASV_doubleton 16 Breakdown of ASVs
## 147 125m.Dec2014 ASV_doubleton 10 Breakdown of ASVs
## 148 150m.Dec2014 ASV_doubleton 14 Breakdown of ASVs
## 149 175m.Dec2014 ASV_doubleton 9 Breakdown of ASVs
## 150 300m.Dec2014 ASV_doubleton 5 Breakdown of ASVs
## 151 400m.Dec2014 ASV_doubleton 19 Breakdown of ASVs
## 152 500m.Dec2014 ASV_doubleton 12 Breakdown of ASVs
## 153 770m.Dec2014 ASV_doubleton 8 Breakdown of ASVs
## 154 5m.Apr ASV_doubleton 17 Breakdown of ASVs
## 155 25m.Apr ASV_doubleton 19 Breakdown of ASVs
## 156 75m.Apr ASV_doubleton 32 Breakdown of ASVs
## 157 100m.Apr ASV_doubleton 46 Breakdown of ASVs
## 158 125m.Apr ASV_doubleton 26 Breakdown of ASVs
## 159 150m.Apr ASV_doubleton 34 Breakdown of ASVs
## 160 175m.Apr ASV_doubleton 25 Breakdown of ASVs
## 161 300m.Apr ASV_doubleton 31 Breakdown of ASVs
## 162 400m.Apr ASV_doubleton 37 Breakdown of ASVs
## 163 500m.Apr ASV_doubleton 40 Breakdown of ASVs
## 164 770m.Apr ASV_doubleton 30 Breakdown of ASVs
## 165 5m.Aug ASV_doubleton 1 Breakdown of ASVs
## 166 25m.Aug ASV_doubleton 4 Breakdown of ASVs
## 167 45m.Aug ASV_doubleton 5 Breakdown of ASVs
## 168 75m.Aug ASV_doubleton 1 Breakdown of ASVs
## 169 100m.Aug ASV_doubleton 26 Breakdown of ASVs
## 170 125m.Aug ASV_doubleton 22 Breakdown of ASVs
## 171 150m.Aug ASV_doubleton 24 Breakdown of ASVs
## 172 175m.Aug ASV_doubleton 19 Breakdown of ASVs
## 173 300m.Aug ASV_doubleton 11 Breakdown of ASVs
## 174 400m.Aug ASV_doubleton 19 Breakdown of ASVs
## 175 500m.Aug ASV_doubleton 15 Breakdown of ASVs
## 176 770m.Aug ASV_doubleton 6 Breakdown of ASVs
## 177 5m.Dec2015 ASV_doubleton 0 Breakdown of ASVs
## 178 25m.Dec2015 ASV_doubleton 9 Breakdown of ASVs
## 179 45m.Dec2015 ASV_doubleton 19 Breakdown of ASVs
## 180 75m.Dec2015 ASV_doubleton 28 Breakdown of ASVs
## 181 100m.Dec2015 ASV_doubleton 22 Breakdown of ASVs
## 182 125m.Dec2015 ASV_doubleton 15 Breakdown of ASVs
## 183 150m.Dec2015 ASV_doubleton 11 Breakdown of ASVs
## 184 175m.Dec2015 ASV_doubleton 21 Breakdown of ASVs
## 185 300m.Dec2015 ASV_doubleton 13 Breakdown of ASVs
## 186 400m.Dec2015 ASV_doubleton 16 Breakdown of ASVs
## 187 500m.Dec2015 ASV_doubleton 9 Breakdown of ASVs
## 188 770m.Dec2015 ASV_doubleton 14 Breakdown of ASVs
## 189 5m.Dec2014 ASV_else 754 Breakdown of ASVs
## 190 25m.Dec2014 ASV_else 669 Breakdown of ASVs
## 191 45m.Dec2014 ASV_else 602 Breakdown of ASVs
## 192 75m.Dec2014 ASV_else 770 Breakdown of ASVs
## 193 100m.Dec2014 ASV_else 728 Breakdown of ASVs
## 194 125m.Dec2014 ASV_else 645 Breakdown of ASVs
## 195 150m.Dec2014 ASV_else 797 Breakdown of ASVs
## 196 175m.Dec2014 ASV_else 542 Breakdown of ASVs
## 197 300m.Dec2014 ASV_else 1165 Breakdown of ASVs
## 198 400m.Dec2014 ASV_else 915 Breakdown of ASVs
## 199 500m.Dec2014 ASV_else 803 Breakdown of ASVs
## 200 770m.Dec2014 ASV_else 868 Breakdown of ASVs
## 201 5m.Apr ASV_else 631 Breakdown of ASVs
## 202 25m.Apr ASV_else 646 Breakdown of ASVs
## 203 75m.Apr ASV_else 713 Breakdown of ASVs
## 204 100m.Apr ASV_else 801 Breakdown of ASVs
## 205 125m.Apr ASV_else 700 Breakdown of ASVs
## 206 150m.Apr ASV_else 730 Breakdown of ASVs
## 207 175m.Apr ASV_else 659 Breakdown of ASVs
## 208 300m.Apr ASV_else 713 Breakdown of ASVs
## 209 400m.Apr ASV_else 938 Breakdown of ASVs
## 210 500m.Apr ASV_else 1003 Breakdown of ASVs
## 211 770m.Apr ASV_else 787 Breakdown of ASVs
## 212 5m.Aug ASV_else 786 Breakdown of ASVs
## 213 25m.Aug ASV_else 902 Breakdown of ASVs
## 214 45m.Aug ASV_else 899 Breakdown of ASVs
## 215 75m.Aug ASV_else 816 Breakdown of ASVs
## 216 100m.Aug ASV_else 686 Breakdown of ASVs
## 217 125m.Aug ASV_else 500 Breakdown of ASVs
## 218 150m.Aug ASV_else 612 Breakdown of ASVs
## 219 175m.Aug ASV_else 713 Breakdown of ASVs
## 220 300m.Aug ASV_else 573 Breakdown of ASVs
## 221 400m.Aug ASV_else 677 Breakdown of ASVs
## 222 500m.Aug ASV_else 652 Breakdown of ASVs
## 223 770m.Aug ASV_else 612 Breakdown of ASVs
## 224 5m.Dec2015 ASV_else 639 Breakdown of ASVs
## 225 25m.Dec2015 ASV_else 791 Breakdown of ASVs
## 226 45m.Dec2015 ASV_else 627 Breakdown of ASVs
## 227 75m.Dec2015 ASV_else 673 Breakdown of ASVs
## 228 100m.Dec2015 ASV_else 518 Breakdown of ASVs
## 229 125m.Dec2015 ASV_else 450 Breakdown of ASVs
## 230 150m.Dec2015 ASV_else 546 Breakdown of ASVs
## 231 175m.Dec2015 ASV_else 570 Breakdown of ASVs
## 232 300m.Dec2015 ASV_else 643 Breakdown of ASVs
## 233 400m.Dec2015 ASV_else 537 Breakdown of ASVs
## 234 500m.Dec2015 ASV_else 471 Breakdown of ASVs
## 235 770m.Dec2015 ASV_else 650 Breakdown of ASVs
#Basic bar plot
ASVbar_statistics <- ggplot(Melted, aes(x=samples, y=value, fill=variable))+geom_bar(stat="identity",position="stack",color="black") +theme_bw()+theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5,size=8),axis.text.y=element_text(size=12),legend.position = "top")
ASVbar_statistics

ASVbar_statistics %+% subset(Melted, Figure %in% "Breakdown of ASVs")+labs(title="Distribution of ASVs",x="Samples", y="Total ASVs")+scale_fill_manual(values=c("#e41a1c","#fee08b","#4393c3"))

ASVbar_statistics %+% subset(Melted, Figure %in% "Total ASVs")+labs(title="Total number of ASVs",x="Samples", y="Total ASVs")+scale_fill_manual(values=c("darkblue"))

ASVbar_statistics %+% subset(Melted, Figure %in% "Sequences")+labs(title="Total number of sequences",x="Samples", y="Total sequences")+scale_fill_manual(values=c("purple"))

############ Normalize using edgeR #############
#apply(ASV_table.no1[2:48],2,sum)
ListDGE = DGEList(Data_cols)
#ListDGE
ListDGE = calcNormFactors(ListDGE, method = "TMM")
#ListDGE
TMMNorm_ASV_DataCols = cpm(ListDGE)
#head(TMMNorm_ASV_DataCols)
TMMNorm_ASV_table = as.data.frame(TMMNorm_ASV_DataCols)
TMMNorm_ASV_table$ASV.ID = row.names(TMMNorm_ASV_table)
Joined<-join(TMMNorm_ASV_table, ASV_table.no1[c(1,49)], by="ASV.ID", type="left", match="first")
pr2_rename_taxa<-function(df){
split<-colsplit(df$Taxon, ";", c("Level1","Level2","Level3","Level4","Level5","Level6", "Level7","Level8","Level9", "Level10", "Level11", "Level12"))
split[ is.na(split) ] = "XXX"
split[ split == "" ] = "XXX"
split$Taxa<-"Other/unknown"
split$Taxa[split$Level1 == "No blast hit"]="No blast hit"
split$Taxa[split$Level1 == "Unassigned"]="Unassigned"
split$Taxa[split$Level1 == "None"]="None"
split$Taxa[split$Level2=="Amoebozoa"]="Amoebozoa"
split$Taxa[split$Level2=="Apusozoa"]="Other/unknown"
split$Taxa[split$Level2=="Eukaryota"]="Other/unknown"
split$Taxa[split$Level2=="Stramenopiles"]="Stramenopiles-Other"
split$Taxa[split$Level2=="Alveolata"]="Alveolates-Other"
split$Taxa[split$Level2=="Opisthokonta"]="Opisthokonts-Other"
split$Taxa[split$Level2=="Archaeplastida"]="Archaeplastids"
split$Taxa[split$Level2=="Excavata"]="Excavates"
split$Taxa[split$Level2=="Rhizaria"]="Rhizaria-Other"
split$Taxa[split$Level2=="Hacrobia"]="Hacrobia-other"
# split$Taxa[split$Level3=="Telonemia"]="Hacrobia-Telonemia"
split$Taxa[split$Level3=="Haptophyta"]="Hacrobia-Haptophytes"
split$Taxa[split$Level3=="Fungi"]="Opisthokont-Fungi"
split$Taxa[split$Level3=="Metazoa"]="Opisthokont-Metazoa"
split$Taxa[split$Level3=="Foraminifera"]="Rhizaria-Foraminifera"
split$Taxa[split$Level3=="Dinophyta"]="Dinoflagellates-other"
split$Taxa[split$Level4=="Syndiniales"]="Alveolates-Syndiniales"
split$Taxa[split$Level3=="Cryptophyta"]="Cryptophytes"
split$Taxa[split$Level3=="Ciliophora"]="Alveolates-Ciliates"
# split$Taxa[split$Level3=="Chlorophyta"]="Archaeplastids-Chlorophytes"
split$Taxa[split$Level3=="Cercozoa"]="Rhizaria-Cercozoa"
split$Taxa[split$Level3=="Radiolaria"]="Rhizaria-Radiolaria"
split$Taxa[split$Level3=="Ochrophyta"]="Stramenopiles-Ochrophyta"
split$Taxa[split$Level3=="Choanoflagellida"]="Opisthokont-Choanoflagellida"
# split$Taxa[split$Level3=="Katablepharidophyta"]="Hacrobia-Katablepharidophyta"
# split$Taxa[split$Level3=="Picozoa"]="Hacrobia-Picozoa"
# split$Taxa[split$Level4=="Acantharea"]="Radiolaria-Acantharia"
# split$Taxa[split$Level4=="Chrysophyceae-Synurophyceae"]="Stramenopiles-Chrysophytes"
split$Taxa[split$Level4=="Pelagophyceae"]="Stramenopiles-Pelagophytes"
split$Taxa[split$Level4=="Bacillariophyta"]="Stramenopiles-Diatoms"
split$Taxa[split$Level4=="MAST"]="Stramenopiles-MAST"
# split$Taxa[split$Level4=="Polycystinea"]="Rhizaria-Polycystines" #Leave out of the main bar plot. its supposed to be an overview.
split$Taxa[split$Level4=="Dinophyceae"]="Alveolates-Dinophyceae"
# split$Taxa[split$Level4=="Labyrinthulea"]="Stramenopiles-Labyrinthulea" #Only include in the bubble plot or whatever will be high resolution. These parasites are important because they show up in the deep.
# split$Taxa[split$Level4=="Prymnesiophyceae"]="Haptophyta-Prymnesiophyceae" #Leave this one out because there is nothing more to gain by pulling it out other than just saying there is micro diversity
# split$Taxa[split$Level4=="Chrysophyceae"]="Stramenopiles-Chrysophyceae"
# split$Taxa[split$Level4=="Dictyochophyceae"]="Stramenopiles-Dictyochophyceae"
# split$Taxa[split$Level4=="Spirotrichea"]="Spirotrichea"
# split$Taxa[split$Level4=="Oligohymenophorea"]="Oligohymenophorea"
# split$Taxa[split$Level4=="Litostomatea"]="Litostomatea"
# split$Taxa[split$Level4=="Phyllopharyngea"]="Phyllopharyngea"
# split$Taxa[split$Level6=="Pedinellales"]="Stramenopiles-Pedinellales"
# split$Taxa[split$Level6=="Pterocoryothoidea"]="Radiolaria-Pterocoryothoidea" #Overlap with other radiolarians. If it comes down to it, i can show how much acantharia is there, but with so few radiolarians in the database it may not be worth listing these two, and a large 'other'
# split$Taxa[split$Level7=="Gymnodinium"]="Dinophyceae-Gymnodinium"
# split$Taxa[split$Level7=="Prorocentrum"]="Dinophyceae-Prorocentrum"
# split$Taxa[split$Level7=="Gyrodinium"]="Dinophyceae-Gyrodinium" #Seems redundant. If I find information supporting that they are distinct from gymnodinium...
# split$Taxa[split$Level7=="Neoceratium"]="Dinophyceae-Neoceratium"
# split$Taxa[split$Level7=="Protoperidinium"]="Dinophyceae-Protoperidinium"
return(split)
}
NT = pr2_rename_taxa(Joined)
DBin = data.frame(Joined, NT)
cnames = c("ASV.ID","5m Dec2014","25m Dec2014","45m Dec2014","75m Dec2014","100m Dec2014","125m Dec2014","155m Dec2014","175m Dec2014","300m Dec2014","400m Dec2014","500m Dec2014","770m Dec2014","5m Apr2015","25m Apr2015","75m Apr2015","100m Apr2015","125m Apr2015","150m Apr2015","175m Apr2015","300m Apr2015","400m Apr2015","500m Apr2015","770m Apr2015","5m Aug2015","25m Aug2015","45m Aug2015","75m Aug2015","100m Aug2015","125m Aug2015","150m Aug2015","175m Aug2015","300m Aug2015","400m Aug2015","500m Aug2015","770m Aug2015","5m Dec2015","25m Dec2015","45m Dec2015","75m Dec2015","100m Dec2015","125m Dec2015","150m Dec2015","175m Dec2015","300m Dec2015","400m Dec2015","500m Dec2015","770m Dec2015","Taxonomy","Level1","Level2","Level3","Level4","Level5","Level6","Level7","Level8","Level9","Level10","Level11","Level12","Taxa")
colnames(DBin) = cnames
#Keep in mind that there are a bunch of ASVs in Metazoa
#DBin = DBin[DBin$Taxa != "Stramenopiles-Pelagophytes",]
#write.csv(DBin, file = "HOT_for_Krona.csv") # output table for constructing Krona plots
################# Figures S4 & 2a Whole Dataset Ordination plots (NMDS & Cluster Dendrogram)
DBin_nums_only = DBin[,2:48]
DBin_relabund = decostand(DBin_nums_only, MARGIN = 2, method = "total") #convert to relative abundances
DBin_relabund.t = t(DBin_relabund) #convert to wide format
cluster<-hclust(dist(DBin_relabund.t), method="average") #Hierarchial clustering of analysis using euclidean distances
FigureS4 = plot(cluster)

######### Figure 2a
# Calculate MDS with Bray-Curtis dissimilarity matrix
NMDS.bray=metaMDS(DBin_relabund.t,distance="bray",k=2)
## Run 0 stress 0.0659
## Run 1 stress 0.0659
## ... New best solution
## ... Procrustes: rmse 0.000192 max resid 0.0004533
## ... Similar to previous best
## Run 2 stress 0.07009
## Run 3 stress 0.06918
## Run 4 stress 0.07009
## Run 5 stress 0.07119
## Run 6 stress 0.07684
## Run 7 stress 0.07119
## Run 8 stress 0.07823
## Run 9 stress 0.07008
## Run 10 stress 0.07397
## Run 11 stress 0.06674
## Run 12 stress 0.06918
## Run 13 stress 0.06918
## Run 14 stress 0.07247
## Run 15 stress 0.07141
## Run 16 stress 0.07009
## Run 17 stress 0.07009
## Run 18 stress 0.0659
## ... New best solution
## ... Procrustes: rmse 0.000154 max resid 0.0004159
## ... Similar to previous best
## Run 19 stress 0.06943
## Run 20 stress 0.07084
## *** Solution reached
# Plot extracted MDS points from each sample using ggplot2
NMDS.bray_pts<-NMDS.bray$points[1:nrow(NMDS.bray$points),]
NMDS.bray_pts<-as.data.frame(NMDS.bray_pts)
NMDS.bray_pts$Sample<-row.names(NMDS.bray_pts)
NMDS.bray_pts$Sample=factor(NMDS.bray_pts$Sample, levels = NMDS.bray_pts$Sample)
NMDS.bray_plot <- ggplot(NMDS.bray_pts, aes(x = MDS1, y = MDS2, shape=Sample, fill=Sample), color="black") +
geom_point(size=3,aes(fill=Sample)) +
theme_bw() +
scale_shape_manual(values = c(21,21,21,21,21,21,21,21,21,21,21,21,24,24,24,24,24,24,24,24,24,24,24,23,23,23,23,23,23,23,23,23,23,23,23,22,22,22,22,22,22,22,22,22,22,22,22,22,22))+scale_fill_manual(values=c("#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928","#a6cee3","#1f78b4","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928","#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928","#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928","#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928"))
NMDS.bray_plot

#### Table1: One-way-ANOVA of MDS 1&2
#This one is a cool lick that matt wrote, and probably uses commonly to isolate pieces of larger data strings.
#sapply takes a variable, X, and then runs a function on it. Here the variable is the result of splitting the depth and month
#in two. The function basically prints the first column,the depth, of the two.
depth<-sapply(strsplit(as.character(NMDS.bray_pts$Sample), ' '), FUN=function(x){x[1]})
#This lick converts the depths, which are currently strings because of the letter, to integers (to do some math with)
#the input is the result of substituting all m's with a blank, sorta like what we do with sed.
depth<-as.integer(gsub(pattern='m', replacement='', x=depth))
#repeated for the months.
month<-sapply(strsplit(as.character(NMDS.bray_pts$Sample), ' '), FUN=function(x){x[2]})
#here we conduct an anova comparing the points along the axis and the depths and months.
#for some reason we get different results when looking at the month and depth simultaneously.
anova(lm(NMDS.bray_pts$MDS1~month*depth))
## Analysis of Variance Table
##
## Response: NMDS.bray_pts$MDS1
## Df Sum Sq Mean Sq F value Pr(>F)
## month 3 0.2 0.1 0.18 0.91
## depth 1 42.4 42.4 148.24 0.0000000000000073 ***
## month:depth 3 0.1 0.0 0.13 0.94
## Residuals 39 11.2 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Below examples illustrate that according to MDS1, depth expains all of the variation.
#ANOVA on MDS1
anova(lm(NMDS.bray_pts$MDS1~month))
## Analysis of Variance Table
##
## Response: NMDS.bray_pts$MDS1
## Df Sum Sq Mean Sq F value Pr(>F)
## month 3 0.2 0.051 0.04 0.99
## Residuals 43 53.7 1.249
anova(lm(NMDS.bray_pts$MDS1~depth))
## Analysis of Variance Table
##
## Response: NMDS.bray_pts$MDS1
## Df Sum Sq Mean Sq F value Pr(>F)
## depth 1 42.4 42.4 167 <0.0000000000000002 ***
## Residuals 45 11.5 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Repeat for MDS2
anova(lm(NMDS.bray_pts$MDS2~month))
## Analysis of Variance Table
##
## Response: NMDS.bray_pts$MDS2
## Df Sum Sq Mean Sq F value Pr(>F)
## month 3 0.38 0.126 0.69 0.56
## Residuals 43 7.82 0.182
anova(lm(NMDS.bray_pts$MDS2~depth))
## Analysis of Variance Table
##
## Response: NMDS.bray_pts$MDS2
## Df Sum Sq Mean Sq F value Pr(>F)
## depth 1 1.1 1.096 6.95 0.011 *
## Residuals 45 7.1 0.158
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#######Figure 1: Multi-Seasonal, Low resolution/ major protistan group taxa barplot
DBin_nometa = DBin[DBin$Taxa != "Opisthokont-Metazoa",] #Remove metazoan ASVs (optional)
data.m = melt(DBin_nometa)
## Using ASV.ID, Taxonomy, Level1, Level2, Level3, Level4, Level5, Level6, Level7, Level8, Level9, Level10, Level11, Level12, Taxa as id variables
data.agg<-aggregate(data.m$value, by=list(Taxa=data.m$Taxa,Samples=data.m$variable),sum) #sum sequences by taxonomic group
# save(data.agg, data.m, data_binned, file="Checkpoint1.Rdata")
# Make the order and colors
tax_order=c("Alveolates-Ciliates","Dinoflagellates-other","Alveolates-Dinophyceae","Alveolates-Syndiniales","Amoebozoa","Excavates","Rhizaria-other","Rhizaria-Radiolaria","Rhizaria-Cercozoa","Rhizaria-Foramnifera","Archaeplastids","Cryptophytes", "Stramenopiles-Ochrophyta","Stramenopiles-Diatoms","Stramenopiles-Pelagophytes","Stramenopiles-Other","Stramenopiles-MAST", "Hacrobia-other","Hacrobia-Haptophytes","Opisthokont-Choanoflagellida","Opisthokont-Fungi","Opisthokonts-Other","Other/unknown")
tax_color = c("#7f0000","#b30000","#e31a1c","#ec7014", "#8c510a","#dfc27d", "#08306b","#08519c","#6baed6","#7fcdbb", "#e7298a","#ffff99", "#e5f5e0","#a1d99b","#41ab5d","#006d2c","#00441b", "#8c510a","#f6e8c3", "#d8daeb","#b2abd2","#542788","#2d004b","#f7f7f7")
names(tax_color)<-tax_order
data.agg$Taxa<-factor(data.agg$Taxa, levels=rev(tax_order)) #factoring to keep the order
#Bar plot of community composition
Relative_abund = ggplot(data.agg[order(data.agg$Taxa),], aes(y=x,fill=tax,x=Samples))+
geom_bar(position = "fill", stat = "identity", color="black",aes(fill=Taxa))+
scale_fill_manual(values=tax_color)+
labs(title="", x="",y="Relative abundance of reads")+
theme_bw()+
theme(legend.position="right",axis.text.x = element_text(angle=45, hjust=1,vjust=1,color="black"))
Relative_abund

################# Concatenate Depths into Three Depth Zones ####################
#Concatinate the seasons
taxa_info = DBin[,49:62]
ASV.ID = DBin$ASV.ID
m5 = apply(DBin[,c(2,14,25,37)],1,mean)
m25 = apply(DBin[,c(3,15,26,38)],1,mean)
m45 = apply(DBin[,c(4,27,39)],1,mean)
m75 = apply(DBin[,c(5,16,28,40)],1,mean)
m100 = apply(DBin[,c(6,17,29,41)],1,mean)
m125 = apply(DBin[,c(7,18,30,42)],1,mean)
m150 = apply(DBin[,c(8,19,31,43)],1,mean)
m175 = apply(DBin[,c(9,20,32,44)],1,mean)
m300 = apply(DBin[,c(10,21,33,45)],1,mean)
m400 = apply(DBin[,c(11,22,34,46)],1,mean)
m500 = apply(DBin[,c(12,23,35,47)],1,mean)
m770 = apply(DBin[,c(13,24,36,48)],1,mean)
Seasons_merged = data.frame(ASV.ID, m5,m25,m45,m75,m100,m125,m150,m175,m300,m400,m500,m770,taxa_info)
#dim(Seasons_merged)
#head(Seasons_merged)
merged.colnames = c("5m","25m","45m","75m","100m","125m","150m","175m","300m","400m","500m","770m")
################### Figure 4a Hierarchial cluster combined data #################
###This is a check to see if the pattern holds.
Seasons_merged_numers = Seasons_merged[,2:13]
colnames(Seasons_merged_numers)=merged.colnames
Season_relabundance = decostand(Seasons_merged_numers, MARGIN = 2, method = "total")
Season_relabundance.t = t(Season_relabundance)
Zonescluster=hclust(dist(Season_relabundance.t), method="average")
Merged_cluster = plot(Zonescluster)

############## Figure 5a-j Individual Taxa profiles ############
##Surface Dwellers##
############## Zones and Individual depth & Shannon, inv-simpson (Alpha Diversity), and ASV count ############
#First isolate only numeric columns
#Zones
Sufeuph_DatCols = Seasons_merged[,2:5]
SubSufeuph_DatCols = Seasons_merged[,6:9]
Subeuph_DatCols = Seasons_merged[,10:13]
#Individual
All_DataCols = Seasons_merged[,2:13]
#renaming the columns to have numbers only for conveinent coding.
colnames(Sufeuph_DatCols)=c("1","2","3","4")
colnames(SubSufeuph_DatCols)=c("1","2","3","4")
colnames(Subeuph_DatCols)=c("1","2","3","4")
colnames(All_DataCols)=c("5","25","45","75","100","125","150","175","300","400","500","770")
#Calculate Shannon diversity indicees
Sufeuph_Shannon = diversity(Sufeuph_DatCols[1:4], index = "shannon",2)
SubSufeuph_Shannon = diversity(SubSufeuph_DatCols[1:4], index = "shannon",2)
Subeuph_Shannon = diversity(Subeuph_DatCols[1:4], index = "shannon",2)
All_Shannon = diversity(All_DataCols[1:12], index = "shannon",2)
Shannon_bound = data.frame(Sufeuph_Shannon,SubSufeuph_Shannon,Subeuph_Shannon)
ShBd.M = melt(Shannon_bound)
## No id variables; using all as measure variables
#ggplot(ShBd.M, aes(x=variable, y=value, colour = variable)) + geom_violin() + geom_boxplot(width = .1, colour = "black", outlier.colour = "black")
ggplot(ShBd.M, aes(x=variable, y=value, fill = variable)) + geom_boxplot(width = .3)

#Repeat with inv simpson (Figure 2b)
Sufeuph_Sim = diversity(Sufeuph_DatCols[1:4], index = "invsimpson",2)
SubSufeuph_Sim = diversity(SubSufeuph_DatCols[1:4], index = "invsimpson",2)
Subeuph_Sim = diversity(Subeuph_DatCols[1:4], index = "invsimpson",2)
All_Sim = diversity(All_DataCols[1:12], index = "invsimpson",2)
Sim_bound = data.frame(Sufeuph_Sim,SubSufeuph_Sim,Subeuph_Sim)
SiBd.M = melt(Sim_bound)
## No id variables; using all as measure variables
ggplot(SiBd.M, aes(x=variable, y=value, fill = variable)) + geom_boxplot(width = .3)

############## Figure 2c Calculate ASV total counts #################
ASVCount.S <- colSums(Sufeuph_DatCols>0)
ASVCount.M <- colSums(SubSufeuph_DatCols>0)
ASVCount.D <- colSums(Subeuph_DatCols>0)
All_ASVCount <- colSums(All_DataCols>0)
ASVCount_DF = data.frame(ASVCount.S,ASVCount.M,ASVCount.D)
ASVC.Melted = melt(ASVCount_DF)
## No id variables; using all as measure variables
#boxplot it
ggplot(ASVC.Melted, aes(x=variable, y=value, fill = variable)) + geom_boxplot(width = .3)

#violin plot it
#ggplot(ASVC.Melted, aes(x=variable, y=value, colour = variable)) + geom_violin() + geom_boxplot(width = .1, colour = "black", outlier.colour = "black")
######## Finally plot all discrete depths alpha diversity
#All depths melted plot
All_Datafram = data.frame(All_Shannon,All_Sim,All_ASVCount)
All_Datafram$Depth = c("5","25","45","75","100","125","150","175","300","400","500","770")
All_Datafram$Depth = factor(All_Datafram$Depth, levels = All_Datafram$Depth)
All_Datafram.M = melt(All_Datafram)
## Using Depth as id variables
#head(All_Datafram.M)
ggplot(All_Datafram.M, aes(x=Depth, y=value, fill=variable, shape=variable))+geom_point(size=3, aes(color=variable))+facet_grid(variable~.,scales="free")+theme_bw()+theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5,size=8),axis.text.y=element_text(size=12),legend.position = "top")

#write.csv(Seasons_merged, file = "Seasons_merged.csv") #curate with krona excel - https://github.com/marbl/Krona/wiki
############## BALOON PLOTS ####
###### Figure 6 Syndiniales ASV distribution
Syndiniales_ASV_table = Seasons_merged[Seasons_merged$Level4 == "Syndiniales",]
ASV_totals = apply(Syndiniales_ASV_table[2:13],1,sum)
Abundant_ASVs = ASV_totals[ASV_totals>15000]
sequence.variant1 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "407ae8e50f14b28b4ee9d2a916027f36",2:13],2,sum)
sequence.variant2 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "76079a7c02538b445611d90603dcdd84",2:13],2,sum)
sequence.variant3 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "41b1fc47277b0f91d2057475fd198356",2:13],2,sum)
sequence.variant4 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "0710f08b3fd6a82916211d4b4ac6c520",2:13],2,sum)
sequence.variant5 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "68b1207a2a740016122faaa91c530cd5",2:13],2,sum)
sequence.variant6 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "e269879576d49f0de9fa24a496e8c480",2:13],2,sum)
sequence.variant7 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "fad8d44de4b6cb7bf9307c2106c2f4f0",2:13],2,sum)
sequence.variant8 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "0146dde243b672179d69288c4bc367b4",2:13],2,sum)
sequence.variant9 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "73af8b0a55f3a6bbf16518ac8655f63a",2:13],2,sum)
sequence.variant10 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "4f9ba8b4deacee7453670af0438970f1",2:13],2,sum)
sequence.variant11 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "dcfecdeedead18507c1eeec9ef4f50f9",2:13],2,sum)
Synd_DF = data.frame(sequence.variant1,sequence.variant2,sequence.variant3,sequence.variant4,sequence.variant5,sequence.variant6,sequence.variant7,sequence.variant8,sequence.variant9,sequence.variant10,sequence.variant11)
Syndf = cbind(rownames(Synd_DF), Synd_DF) # add the sample column to the dataset
names(Syndf)[1] = "sample"
Syndf$sample=factor(Syndf$sample, levels = Syndf$sample) #Cool trick to keep the order of samples in the plot.
Syndf.m = melt(Syndf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Synd_baloon = ggplot(Syndf.m, aes(x=sample, y=variable)) +
geom_point(aes(size=value, fill=factor(variable)), shape=21) +
scale_size_area(max_size=12) + theme_bw() + theme(axis.text.y = element_text(angle = 45)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Synd_baloon

#### Figure S3b diatom.diversity
Diatom.Seasons_merged = Seasons_merged[Seasons_merged$Level4 == "Bacillariophyta",]
diatomlist = Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 %in% unique(Diatom.Seasons_merged$Level7),]
unique(diatomlist$Level7) #The diatom species
## [1] "Pseudo-nitzschia" "XXX"
## [3] "Chaetoceros" "Thalassiosira"
## [5] "Polar-centric-Mediophyceae_X" "Guinardia"
## [7] "Raphid-pennate_X" "Cylindrotheca"
## [9] "Pleurosigma" "Nitzschia"
## [11] "Asteromphalus" "Minutocellus"
## [13] "Hemiaulus" "Actinocyclus"
## [15] "Stellarima" "Navicula"
## [17] "Corethron" "Cerataulina"
nrow(Diatom.Seasons_merged) #Number of diatom ASVs
## [1] 322
#Sum the number of reads for each diatom species
Pnitz = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Pseudo-nitzschia",2:13],2,sum)
Chaetoceros = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Chaetoceros",2:13],2,sum)
Mediophyceae = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Mediophyceae",2:13],2,sum)
pennate = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Raphid-pennate",2:13],2,sum)
Pleurosigma = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Pleurosigma",2:13],2,sum)
Hemiaulus = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Hemiaulus",2:13],2,sum)
Stellarima = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Stellarima",2:13],2,sum)
Corethron = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Corethron",2:13],2,sum)
Thalassiosira = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Thalassiosira",2:13],2,sum)
Guinardia = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Guinardia",2:13],2,sum)
Cylindrotheca = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Cylindrotheca",2:13],2,sum)
Nitzschia = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Nitzschia",2:13],2,sum)
Minutocellus = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Minutocellus",2:13],2,sum)
Actinocyclus = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Actinocyclus",2:13],2,sum)
Navicula = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Navicula",2:13],2,sum)
Cerataulina = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Cerataulina",2:13],2,sum)
unknown = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "XXX",2:13],2,sum)
Diatom.dataframe = data.frame(unknown,Pnitz,Chaetoceros,Mediophyceae,pennate,Pleurosigma,Hemiaulus,Stellarima,Corethron,Thalassiosira,Guinardia,Cylindrotheca,Nitzschia,Minutocellus,Actinocyclus,Navicula,Cerataulina)
Diatdf = cbind(rownames(Diatom.dataframe), Diatom.dataframe) # add the sample column to the dataset
names(Diatdf)[1] = "sample"
Diatdf$sample=factor(Diatdf$sample, levels = Diatdf$sample) #Cool trick to keep the order of samples in the plot.
Diatdf.m = melt(Diatdf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Diatoms_baloon = ggplot(Diatdf.m, aes(x=sample, y=variable)) +
geom_point(aes(size=value, fill=factor(variable)), shape=21) +
scale_size_area(max_size=10) + theme_bw() + theme(axis.text.y = element_text(angle = 45)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="red"))
Diatoms_baloon

### Figure S3a Dinophyceae
Dinophyceae.Seasons_merged = Seasons_merged[Seasons_merged$Level4 == "Dinophyceae",]
unique(Dinophyceae.Seasons_merged$Level7) #dinoflagellate species
## [1] "Dinophyceae_XXX" "XXX" "Prorocentrum" "Protoperidinium"
## [5] "Blastodinium" "Gyrodinium" "Gymnodinium" "Cochlodinium"
## [9] "Heterocapsa" "Alexandrium" "Karlodinium" "Phalacroma"
## [13] "Takayama" "Noctiluca" "Kofoidinium" "Warnowia"
## [17] "Woloszynskia" "Gonyaulax" "Lepidodinium" "Neoceratium"
## [21] "Lingulodinium" "Abedinium" "Suessiales_XX" "Protoceratium"
## [25] "Archaeperidinium" "Symbiodinium" "Pyrodinium"
nrow(Dinophyceae.Seasons_merged) #number of dinoflagellate ASVs
## [1] 1449
#Sum species
Unk.dino = (apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "XXX",2:13],2,sum) + apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Dinophyceae_XXX",2:13],2,sum))
Blastodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Blastodinium",2:13],2,sum)
Heterocapsa = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Heterocapsa",2:13],2,sum)
Takayama = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Takayama",2:13],2,sum)
Woloszynskia = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Woloszynskia",2:13],2,sum)
Lingulodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Lingulodinium",2:13],2,sum)
Archaeperidinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Archaeperidinium",2:13],2,sum)
Gyrodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Gyrodinium",2:13],2,sum)
Alexandrium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Alexandrium",2:13],2,sum)
Noctiluca = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Noctiluca",2:13],2,sum)
Gonyaulax = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Gonyaulax",2:13],2,sum)
Abedinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Abedinium",2:13],2,sum)
Symbiodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Symbiodinium",2:13],2,sum)
Prorocentrum = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Prorocentrum",2:13],2,sum)
Gymnodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Gymnodinium",2:13],2,sum)
Kofoidinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Kofoidinium",2:13],2,sum)
Lepidodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Lepidodinium",2:13],2,sum)
Suessiales = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Suessiales_XX",2:13],2,sum)
Pyrodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Pyrodinium",2:13],2,sum)
Protoperidinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Protoperidinium",2:13],2,sum)
Cochlodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Cochlodinium",2:13],2,sum)
Phalacroma = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Phalacroma",2:13],2,sum)
Warnowia = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Warnowia",2:13],2,sum)
Neoceratium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Neoceratium",2:13],2,sum)
Protoceratium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Protoceratium",2:13],2,sum)
Dinophyceae.dataframe = data.frame(Unk.dino,Blastodinium,Heterocapsa,Takayama,Woloszynskia,Lingulodinium,Archaeperidinium,Gyrodinium,Alexandrium,Noctiluca,Gonyaulax,Abedinium,Symbiodinium,Prorocentrum,Gymnodinium,Kofoidinium,Lepidodinium,Suessiales,Pyrodinium,Protoperidinium,Cochlodinium,Phalacroma,Warnowia,Neoceratium,Protoceratium,Diatom.dataframe)
Dinodf = cbind(rownames(Dinophyceae.dataframe), Dinophyceae.dataframe) # add the sample column to the dataset
names(Dinodf)[1] = "sample"
Dinodf$sample=factor(Dinodf$sample, levels = Dinodf$sample) #Cool trick to keep the order of samples in the plot.
Dinodf.m = melt(Dinodf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Dinophyceae_baloon = ggplot(Dinodf.m, aes(x=sample, y=variable)) +
geom_point(aes(size=value, fill=factor(variable)), shape=21) +
scale_size_area(max_size=10) + theme_bw() + theme(axis.text.y = element_text(angle = 45)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="red"))
Dinophyceae_baloon

# Dinoflagellate read counts and proportions
Dinosum = sum(colSums(Dinophyceae.dataframe)) #Number of dinoflagellate reads
big3_readcount=sum(Dinophyceae.dataframe$Gyrodinium)+sum(Dinophyceae.dataframe$Prorocentrum)+sum(Dinophyceae.dataframe$Gymnodinium)
big3_readcount/Dinosum
## [1] 0.3185
##### Figure S2 Mast, Haptophyte, Radiolaria, Ciliate distributions
###### MAST distribution #####
MAST.3 = apply(Seasons_merged[Seasons_merged$Level6 == "MAST-3I",2:13],2,sum)
MAST.1C = apply(Seasons_merged[Seasons_merged$Level6 == "MAST-1C",2:13],2,sum)
MAST.7 = apply(Seasons_merged[Seasons_merged$Level5 == "MAST-7",2:13],2,sum)
MAST.25_X = apply(Seasons_merged[Seasons_merged$Level6 == "MAST-25_X",2:13],2,sum)
MAST.3K = apply(Seasons_merged[Seasons_merged$Level6 == "MAST-3K",2:13],2,sum)
Mast_DF = data.frame(MAST.3,MAST.1C,MAST.7,MAST.25_X,MAST.3K)
Mastdf = cbind(rownames(Mast_DF), Mast_DF) # add the sample column to the dataset
names(Mastdf)[1] = "sample"
Mastdf$sample=factor(Mastdf$sample, levels = Mastdf$sample) #Cool trick to keep the order of samples in the plot.
Mastdf.m = melt(Mastdf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Mastdf_baloon = ggplot(Mastdf.m, aes(x=sample, y=variable)) +
geom_point(aes(size=value, fill=factor(variable)), shape=21) +
scale_size_area(max_size=12) + theme_bw() + theme(axis.text.y = element_text(angle = 90)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Mastdf_baloon

##### Figure S2 Haptophytes ########
Haptophytes.Seasons_merged = Seasons_merged[Seasons_merged$Level3 == "Haptophyta",]
unique(Haptophytes.Seasons_merged$Level5)
## [1] "XXX" "Prymnesiales"
## [3] "Haptophyta_Clade_HAP3_X" "Haptophyta_Clade_HAP4_X"
## [5] "Haptophyta_XX" "Haptophyta_Clade_HAP2_X"
## [7] "Prymnesiophyceae_Clade_E" "Phaeocystales"
## [9] "Prymnesiophyceae_X" "Prymnesiophyceae_Clade_D"
## [11] "Isochrysidales" "Coccolithales"
unique(Haptophytes.Seasons_merged$Level6)
## [1] "XXX" "Chrysochromulinaceae"
## [3] "Haptophyta_Clade_HAP3_XX" "Haptophyta_Clade_HAP4_XX"
## [5] "Prymnesiophyceae_Clade_B3" "Haptophyta_XXX"
## [7] "Prymnesiaceae" "Haptophyta_Clade_HAP2_XX"
## [9] "Prymnesiophyceae_Clade_E_X" "Phaeocystaceae"
## [11] "Braarudosphaeraceae" "Prymnesiophyceae_Clade_D_X"
## [13] "Prymnesiophyceae_Clade_B4" "Noelaerhabdaceae"
## [15] "Chrysotilaceae" "Prymnesiophyceae_XX"
## [17] "Calyptrosphaeraceae"
nrow(Haptophytes.Seasons_merged) #haptophyte ASVs 423
## [1] 423
#Sum Species
Prymnesiales = apply(Seasons_merged[Seasons_merged$Level5 == "Prymnesiales",2:13],2,sum)
Phaeocystis = apply(Seasons_merged[Seasons_merged$Level5 == "Phaeocystales",2:13],2,sum)
Hapto.other = apply(Seasons_merged[Seasons_merged$Level3 == "Haptophyta",2:13],2,sum)
Hap4 = apply(Seasons_merged[Seasons_merged$Level5 == "Haptophyta_Clade_HAP4_X",2:13],2,sum)
PrymnClade_D = apply(Seasons_merged[Seasons_merged$Level5 == "Prymnesiophyceae_Clade_D",2:13],2,sum)
Cocco = apply(Seasons_merged[Seasons_merged$Level5 == "Coccolithales",2:13],2,sum)
Clade.B3 = apply(Seasons_merged[Seasons_merged$Level6 == "Prymnesiophyceae_Clade_B3",2:13],2,sum)
Clade.EX = apply(Seasons_merged[Seasons_merged$Level6 == "Prymnesiophyceae_Clade_E_X",2:13],2,sum)
Prymnesiaceae = apply(Seasons_merged[Seasons_merged$Level6 == "Prymnesiaceae",2:13],2,sum)
Chrysochromulinaceae = apply(Seasons_merged[Seasons_merged$Level6 == "Chrysochromulinaceae",2:13],2,sum)
Hapto_DF = data.frame(Hapto.other,Chrysochromulinaceae,Phaeocystis,Hap4,PrymnClade_D,Cocco,Clade.B3,Clade.EX,Prymnesiaceae)
Hapdf = cbind(rownames(Hapto_DF), Hapto_DF) # add the sample column to the dataset
names(Hapdf)[1] = "sample"
Hapdf$sample=factor(Hapdf$sample, levels = Hapdf$sample) #Cool trick to keep the order of samples in the plot.
Hapdf.m = melt(Hapdf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Hapto_baloon = ggplot(Hapdf.m, aes(x=sample, y=variable)) +
geom_point(aes(size=value, fill=factor(variable)), shape=21) +
scale_size_area(max_size=15) + theme_bw() + theme(axis.text.y = element_text(angle = 90)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Hapto_baloon

###### Figure S2 Radiolaria ######
RadMerged_Table = Seasons_merged[Seasons_merged$Level3 == "Radiolaria",]
Radb_Merged_Table = RadMerged_Table[RadMerged_Table$Level4 == "RAD-B",]
Acanth_Merged_Table = RadMerged_Table[RadMerged_Table$Level4 == "Acantharea",]
length(unique(RadMerged_Table$ASV.ID))
## [1] 1269
uniques.radiolaria = unique(RadMerged_Table$Level4)
#Sum species reads
Rad.Other = apply(Seasons_merged[Seasons_merged$Level4 == "XXX",2:13],2,sum)
Rad.Other = Rad.Other + apply(Seasons_merged[Seasons_merged$Level4 == "Radiolaria_X",2:13],2,sum)
Acantharia = apply(Seasons_merged[Seasons_merged$Level4 == "Acantharea",2:13],2,sum)
RAD.A = apply(Seasons_merged[Seasons_merged$Level4 == "RAD-A",2:13],2,sum)
RAD.B = apply(Seasons_merged[Seasons_merged$Level4 == "RAD-B",2:13],2,sum)
RAD.C = apply(Seasons_merged[Seasons_merged$Level4 == "RAD-C",2:13],2,sum)
Polycyst = apply(Seasons_merged[Seasons_merged$Level4 == "Polycystinea",2:13],2,sum)
Radiolaria_DF = data.frame(Acantharia,RAD.A,RAD.B,RAD.C,Polycyst,Rad.Other)
Rdf = cbind(rownames(Radiolaria_DF), Radiolaria_DF)
names(Rdf)[1] = "sample"
Rdf$sample=factor(Rdf$sample, levels = Rdf$sample) #Cool trick to keep the order of samples in the plot.
Rdf.m = melt(Rdf)
## Using sample as id variables
#Baloon plot it
Rad_baloon = ggplot(Rdf.m, aes(x=sample, y=variable)) +
geom_point(aes(size=value, fill=factor(variable)), shape=21) +
scale_size_area(max_size=15) + theme_bw() + theme(axis.text.y = element_text(angle = 90)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Rad_baloon

######## Figure S2 Ciliates ##
# Make a baloon plot from which we can extrapolate the distribution patterns of the following ciliate subclasses:
# Spirotrichea, Prostomatea, Colpodea, Litostomatea, Phyllopharyngea, Oligohymenophorea, and a group called Ciliate_others
CiliateMerged_Table = Seasons_merged[Seasons_merged$Level3 == "Ciliophora",]
#Sum number of reads
Spirotrich = apply(Seasons_merged[Seasons_merged$Level4 == "Spirotrichea",2:13],2,sum)
Phyllopharyngea = apply(Seasons_merged[Seasons_merged$Level4 == "Phyllopharyngea",2:13],2,sum)
Oligohym = apply(Seasons_merged[Seasons_merged$Level4 == "Oligohymenophorea",2:13],2,sum)
Litostome = apply(Seasons_merged[Seasons_merged$Level4 == "Litostomatea",2:13],2,sum)
Colpodea = apply(Seasons_merged[Seasons_merged$Level4 == "Colpodea",2:13],2,sum)
Prostomatea = apply(Seasons_merged[Seasons_merged$Level4 == "Prostomatea",2:13],2,sum)
Other.Ciliate = apply(Seasons_merged[Seasons_merged$Level4 == "XXX",2:13],2,sum)
Ciliate_DF = data.frame(Spirotrich,Phyllopharyngea,Oligohym,Litostome,Colpodea,Prostomatea,Other.Ciliate)
Cdf = cbind(rownames(Ciliate_DF), Ciliate_DF)
names(Cdf)[1] = "sample"
Cdf$sample=factor(Cdf$sample, levels = Cdf$sample) #Cool trick to keep the order of samples in the plot.
Cdf.m = melt(Cdf)
## Using sample as id variables
#Baloon plot it
Cili_baloon = ggplot(Cdf.m, aes(x=sample, y=variable)) +
geom_point(aes(size=value, fill=factor(variable)), shape=21) +
scale_size_area(max_size=15) + theme_bw() + theme(axis.text.y = element_text(angle = 90)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Cili_baloon

########### Figure 3 #####
#making upsetR plot
# 1. Read in an ASV table
Seasons_merged_Upset = Seasons_merged
# 2. Make binary columns using if else statement
Seasons_merged_Upset$binary_5m = ifelse(Seasons_merged_Upset$m5>10,1,0)
Seasons_merged_Upset$binary_25m = ifelse(Seasons_merged_Upset$m25>10,1,0)
Seasons_merged_Upset$binary_45m = ifelse(Seasons_merged_Upset$m45>10,1,0)
Seasons_merged_Upset$binary_75m = ifelse(Seasons_merged_Upset$m75>10,1,0)
Seasons_merged_Upset$binary_100m = ifelse(Seasons_merged_Upset$m100>10,1,0)
Seasons_merged_Upset$binary_125m = ifelse(Seasons_merged_Upset$m125>10,1,0)
Seasons_merged_Upset$binary_150m = ifelse(Seasons_merged_Upset$m150>10,1,0)
Seasons_merged_Upset$binary_175m = ifelse(Seasons_merged_Upset$m175>10,1,0)
Seasons_merged_Upset$binary_300m = ifelse(Seasons_merged_Upset$m300>10,1,0)
Seasons_merged_Upset$binary_400m = ifelse(Seasons_merged_Upset$m400>10,1,0)
Seasons_merged_Upset$binary_500m = ifelse(Seasons_merged_Upset$m500>10,1,0)
Seasons_merged_Upset$binary_770m = ifelse(Seasons_merged_Upset$m770>10,1,0)
#Plot using upsetR
upset(Seasons_merged_Upset, keep.order = TRUE, sets = c("binary_770m","binary_500m","binary_400m","binary_300m","binary_175m","binary_150m","binary_125m","binary_100m","binary_75m","binary_45m","binary_25m","binary_5m") , mainbar.y.label = "Number of ASVs", text.scale = 1.5)
